In this report, we extract information about published JOSS papers and generate graphics as well as a summary table that can be downloaded and used for further analyses.
suppressPackageStartupMessages({
library(tibble)
library(rcrossref)
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(gh)
library(purrr)
library(jsonlite)
library(DT)
library(plotly)
})## Read archived version of summary data frame, to use for filling in
## information about software repositories (due to limit on API requests)
## Sort by the date when software repo info was last obtained
papers_archive <- readRDS(gzcon(url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_analytics.rds?raw=true"))) %>%
dplyr::arrange(!is.na(repo_info_obtained), repo_info_obtained)We get the information about published JOSS papers from Crossref, using the rcrossref R package. This package is also used to extract citation counts.
## Fetch JOSS papers from Crossref
## Only 1000 papers at the time can be pulled down
lim <- 1000
papers <- rcrossref::cr_works(filter = c(issn = "2475-9066"),
limit = lim)$data## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## [90mThis warning is displayed once every 8 hours.[39m
## [90mCall `lifecycle::last_warnings()` to see where this warning was generated.[39m
i <- 1
while (nrow(papers) == i * lim) {
papers <- dplyr::bind_rows(
papers,
rcrossref::cr_works(filter = c(issn = "2475-9066"),
limit = lim, offset = i * lim)$data)
i <- i + 1
}
papers <- papers %>%
dplyr::filter(type == "journal-article")
## A few papers don't have DOIs - generate them from the URL
noaltid <- which(is.na(papers$alternative.id))
papers$alternative.id[noaltid] <- gsub("http://dx.doi.org/", "",
papers$url[noaltid])
## Get citation info from Crossref and merge with paper details
cit <- rcrossref::cr_citation_count(doi = papers$alternative.id)
papers <- papers %>% dplyr::left_join(
cit %>% dplyr::rename(citation_count = count),
by = c("alternative.id" = "doi")
)
## Remove one duplicated paper
papers <- papers %>% dplyr::filter(alternative.id != "10.21105/joss.00688")
source_track <- c(source_track,
structure(rep("crossref", ncol(papers)),
names = colnames(papers)))For each published paper, we use the Whedon API to get information about pre-review and review issue numbers, corresponding software repository etc.
whedon <- list()
p <- 1
a <- jsonlite::fromJSON(
url(paste0("https://joss.theoj.org/papers/published.json?page=", p)),
simplifyDataFrame = FALSE
)
while (length(a) > 0) {
whedon <- c(whedon, a)
p <- p + 1
a <- jsonlite::fromJSON(
url(paste0("https://joss.theoj.org/papers/published.json?page=", p)),
simplifyDataFrame = FALSE
)
}
whedon <- do.call(dplyr::bind_rows, lapply(whedon, function(w) {
data.frame(api_title = w$title,
api_state = w$state,
editor = paste(w$metadata$paper$editor, collapse = ","),
reviewers = paste(w$reviewers, collapse = ","),
nbr_reviewers = length(w$reviewers),
repo_url = w$repository_url,
review_issue_id = w$review_issue_id,
doi = w$doi,
prereview_issue_id = ifelse(!is.null(w$meta_review_issue_id),
w$meta_review_issue_id, NA_integer_),
languages = paste(w$metadata$paper$languages, collapse = ","),
archive_doi = w$metadata$paper$archive_doi)
}))
papers <- papers %>% dplyr::left_join(whedon, by = c("alternative.id" = "doi"))
source_track <- c(source_track,
structure(rep("whedon", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))From each pre-review and review issue, we extract information about review times and assigned labels.
## Pull down info on all issues in the joss-reviews repository
issues <- gh("/repos/openjournals/joss-reviews/issues",
.limit = 5000, state = "all")## From each issue, extract required information
iss <- do.call(dplyr::bind_rows, lapply(issues, function(i) {
data.frame(title = i$title,
number = i$number,
state = i$state,
opened = i$created_at,
closed = ifelse(!is.null(i$closed_at),
i$closed_at, NA_character_),
ncomments = i$comments,
labels = paste(setdiff(
vapply(i$labels, getElement,
name = "name", character(1L)),
c("review", "pre-review", "query-scope", "paused")),
collapse = ","))
}))
## Split into REVIEW, PRE-REVIEW, and other issues (the latter category
## is discarded)
issother <- iss %>% dplyr::filter(!grepl("\\[PRE REVIEW\\]", title) &
!grepl("\\[REVIEW\\]", title))
dim(issother)## [1] 26 7
## title number state
## 1 @torressa @poulson I only found a couple of small issues: 2082 closed
## 2 Request to regenerate final proof 2045 closed
## 3 issues running example program Karate 2015 closed
## 4 @whedon commands 1898 closed
## 5 @whedon 1897 closed
## 6 Updated issue template 1510 closed
## opened closed ncomments labels
## 1 2020-02-07T09:51:50Z 2020-02-07T09:52:09Z 2
## 2 2020-01-28T14:44:07Z 2020-01-28T14:45:26Z 2
## 3 2020-01-15T13:25:37Z 2020-01-15T15:05:18Z 3
## 4 2019-11-17T09:44:23Z 2019-11-17T10:26:41Z 4
## 5 2019-11-17T09:43:49Z 2019-11-17T10:26:30Z 4
## 6 2019-06-18T19:13:46Z 2019-06-18T19:43:30Z 0
## For REVIEW issues, generate the DOI of the paper from the issue number
getnbrzeros <- function(s) {
paste(rep(0, 5 - nchar(s)), collapse = "")
}
issrev <- iss %>% dplyr::filter(grepl("\\[REVIEW\\]", title)) %>%
dplyr::mutate(nbrzeros = purrr::map_chr(number, getnbrzeros)) %>%
dplyr::mutate(alternative.id = paste0("10.21105/joss.",
nbrzeros,
number)) %>%
dplyr::select(-nbrzeros) %>%
dplyr::mutate(title = gsub("\\[REVIEW\\]: ", "", title)) %>%
dplyr::rename_at(vars(-alternative.id), ~ paste0("review_", .))
## For PRE-REVIEW issues, add information about the corresponding REVIEW
## issue number
isspre <- iss %>% dplyr::filter(grepl("\\[PRE REVIEW\\]", title)) %>%
dplyr::filter(!grepl("withdrawn", labels)) %>%
dplyr::filter(!grepl("rejected", labels))
## Some titles have multiple pre-review issues. In these cases, keep the latest
isspre <- isspre %>% dplyr::arrange(desc(number)) %>%
dplyr::filter(!duplicated(title)) %>%
dplyr::mutate(title = gsub("\\[PRE REVIEW\\]: ", "", title)) %>%
dplyr::rename_all(~ paste0("prerev_", .))
papers <- papers %>% dplyr::left_join(issrev, by = "alternative.id") %>%
dplyr::left_join(isspre, by = c("prereview_issue_id" = "prerev_number")) %>%
dplyr::mutate(prerev_opened = as.Date(prerev_opened),
prerev_closed = as.Date(prerev_closed),
review_opened = as.Date(review_opened),
review_closed = as.Date(review_closed)) %>%
dplyr::mutate(days_in_pre = prerev_closed - prerev_opened,
days_in_rev = review_closed - review_opened,
to_review = !is.na(review_opened))
source_track <- c(source_track,
structure(rep("joss-github", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))## Reorder so that software repositories that were interrogated longest
## ago are checked first
tmporder <- order(match(papers$alternative.id, papers_archive$alternative.id),
na.last = FALSE)
software_urls <- papers$repo_url[tmporder]
is_github <- grepl("github", software_urls)
length(is_github)## [1] 1015
## [1] 973
## [1] "https://bitbucket.org/cmutel/brightway2"
## [2] "https://bitbucket.org/cloopsy/android/"
## [3] "https://bitbucket.org/manuela_s/hcp/"
## [4] "https://doi.org/10.17605/OSF.IO/3DS6A"
## [5] "https://bitbucket.org/glotzer/rowan"
## [6] "https://gitlab.com/moorepants/skijumpdesign"
## [7] "https://gitlab.com/toposens/public/ros-packages"
## [8] "https://gitlab.inria.fr/azais/treex"
## [9] "https://bitbucket.org/basicsums/basicsums"
## [10] "https://savannah.nongnu.org/projects/complot/"
## [11] "http://mutabit.com/repos.fossil/grafoscopio/"
## [12] "https://bitbucket.org/cardosan/brightway2-temporalis"
## [13] "https://bitbucket.org/cdegroot/wediff"
## [14] "https://bitbucket.org/meg/cbcbeat"
## [15] "https://vcs.ynic.york.ac.uk/analysis/sails"
## [16] "https://bitbucket.org/ocellarisproject/ocellaris"
## [17] "https://gitlab.com/QComms/cqptoolkit"
## [18] "https://gitlab.com/dlr-dw/ontocode"
## [19] "https://gitlab.com/eidheim/Simple-Web-Server"
## [20] "https://bitbucket.org/dghoshal/frieda"
## [21] "https://gitlab.com/tesch1/cppduals"
## [22] "https://gitlab.com/gdetor/genetic_alg"
## [23] "https://bitbucket.org/hammurabicode/hamx"
## [24] "https://gitlab.com/datafold-dev/datafold/"
## [25] "https://bitbucket.org/likask/mofem-cephas"
## [26] "https://www.idpoisson.fr/fullswof/"
## [27] "https://sourceforge.net/p/mcapl/mcapl_code/ci/master/tree/"
## [28] "https://gricad-gitlab.univ-grenoble-alpes.fr/ttk/spam/"
## [29] "https://c4science.ch/source/tamaas/"
## [30] "https://gitlab.inria.fr/miet/miet"
## [31] "https://bitbucket.org/mpi4py/mpi4py-fft"
## [32] "https://gitlab.com/myqueue/myqueue"
## [33] "https://gitlab.com/cerfacs/batman"
## [34] "https://gitlab.com/materials-modeling/wulffpack"
## [35] "https://bitbucket.org/dolfin-adjoint/pyadjoint"
## [36] "https://gitlab.com/costrouc/pysrim"
## [37] "https://bitbucket.org/rram/dvrlib/src/joss/"
## [38] "https://ts-gitlab.iup.uni-heidelberg.de/dorie/dorie"
## [39] "https://gitlab.com/davidtourigny/dynamic-fba"
## [40] "https://gitlab.com/celliern/scikit-fdiff/"
## [41] "https://ts-gitlab.iup.uni-heidelberg.de/utopia/utopia"
## [42] "https://ts-gitlab.iup.uni-heidelberg.de/utopia/dantro"
df <- do.call(dplyr::bind_rows, lapply(software_urls[is_github], function(u) {
u0 <- gsub("^http://", "https://", gsub("\\.git$", "", gsub("/$", "", u)))
if (grepl("/tree/", u0)) {
u0 <- strsplit(u0, "/tree/")[[1]][1]
}
if (grepl("/blob/", u0)) {
u0 <- strsplit(u0, "/blob/")[[1]][1]
}
info <- try({
gh(gsub("(https://)?(www.)?github.com/", "/repos/", u0))
})
contribs <- try({
gh(paste0(gsub("(https://)?(www.)?github.com/", "/repos/", u0), "/contributors"),
.limit = 500)
})
if (!is(info, "try-error") && length(info) > 1) {
if (!is(contribs, "try-error")) {
if (length(contribs) == 0) {
repo_nbr_contribs <- repo_nbr_contribs_2ormore <- NA_integer_
} else {
repo_nbr_contribs <- length(contribs)
repo_nbr_contribs_2ormore <- sum(vapply(contribs, function(x) x$contributions >= 2, NA_integer_))
if (is.na(repo_nbr_contribs_2ormore)) {
print(contribs)
}
}
} else {
repo_nbr_contribs <- repo_nbr_contribs_2ormore <- NA_integer_
}
data.frame(repo_url = u,
repo_created = info$created_at,
repo_updated = info$updated_at,
repo_pushed = info$pushed_at,
repo_nbr_stars = info$stargazers_count,
repo_language = ifelse(!is.null(info$language),
info$language, NA_character_),
repo_license = ifelse(!is.null(info$license),
info$license$key, NA_character_),
repo_nbr_contribs = repo_nbr_contribs,
repo_nbr_contribs_2ormore = repo_nbr_contribs_2ormore
)
} else {
NULL
}
})) %>%
dplyr::mutate(repo_created = as.Date(repo_created),
repo_updated = as.Date(repo_updated),
repo_pushed = as.Date(repo_pushed)) %>%
dplyr::distinct() %>%
dplyr::mutate(repo_info_obtained = lubridate::today())
stopifnot(length(unique(df$repo_url)) == length(df$repo_url))
dim(df)
## For papers not in df (i.e., for which we didn't get a valid response
## from the GitHub API query), use information from the archived data frame
dfarchive <- papers_archive %>%
dplyr::select(colnames(df)) %>%
dplyr::filter(!(repo_url %in% df$repo_url))
df <- dplyr::bind_rows(df, dfarchive)
papers <- papers %>% dplyr::left_join(df, by = "repo_url")
source_track <- c(source_track,
structure(rep("sw-github", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))## Convert publication date to Date format
## Add information about the half year (H1, H2) of publication
## Count number of authors
papers <- papers %>% dplyr::select(-reference, -license, -link) %>%
dplyr::mutate(published.date = as.Date(published.print)) %>%
dplyr::mutate(
halfyear = paste0(year(published.date),
ifelse(month(published.date) <= 6, "H1", "H2"))
) %>% dplyr::mutate(
halfyear = factor(halfyear,
levels = paste0(rep(sort(unique(year(published.date))),
each = 2), c("H1", "H2")))
) %>% dplyr::mutate(nbr_authors = vapply(author, function(a) nrow(a), NA_integer_))
source_track <- c(source_track,
structure(rep("cleanup", length(setdiff(colnames(papers),
names(source_track)))),
names = setdiff(colnames(papers), names(source_track))))In some cases, fetching information from (e.g.) the GitHub API fails for a subset of the publications. There are also other reasons for missing values (for example, the earliest submissions do not have an associated pre-review issue). The table below lists the number of missing values for each of the variables in the data frame.
ggplot(papers %>%
dplyr::mutate(pubmonth = lubridate::floor_date(published.date, "month")) %>%
dplyr::group_by(pubmonth) %>%
dplyr::summarize(npub = n()),
aes(x = factor(pubmonth), y = npub)) +
geom_bar(stat = "identity") + theme_minimal() +
labs(x = "", y = "Number of published papers per month") +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(papers %>%
dplyr::mutate(pubyear = lubridate::year(published.date)) %>%
dplyr::group_by(pubyear) %>%
dplyr::summarize(npub = n()),
aes(x = factor(pubyear), y = npub)) +
geom_bar(stat = "identity") + theme_minimal() +
labs(x = "", y = "Number of published papers per year") +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))Papers with 20 or more citations are grouped in the “>=20” category.
ggplot(papers %>%
dplyr::mutate(citation_count = replace(citation_count,
citation_count >= 20, ">=20")) %>%
dplyr::mutate(citation_count = factor(citation_count,
levels = c(0:20, ">=20"))) %>%
dplyr::group_by(citation_count) %>%
dplyr::tally(),
aes(x = citation_count, y = n)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(x = "Crossref citation count", y = "Number of publications")The table below sorts the JOSS papers in decreasing order by the number of citations in Crossref.
plotly::ggplotly(
ggplot(papers, aes(x = published.date, y = citation_count, label = title)) +
geom_point(alpha = 0.5) + theme_bw() + scale_y_sqrt() +
geom_smooth() +
labs(x = "Date of publication", y = "Crossref citation count") +
theme(axis.title = element_text(size = 15)),
tooltip = c("label", "x", "y")
)Here, we plot the citation count for all papers published within each half year, sorted in decreasing order.
ggplot(papers %>% dplyr::group_by(halfyear) %>%
dplyr::arrange(desc(citation_count)) %>%
dplyr::mutate(idx = seq_along(citation_count)),
aes(x = idx, y = citation_count)) +
geom_point(alpha = 0.5) +
facet_wrap(~ halfyear, scales = "free") +
theme_bw() +
labs(x = "Index", y = "Crossref citation count")In these plots we investigate whether the time a submission spends in the pre-review or review stage has changed over time.
ggplot(papers, aes(x = prerev_opened, y = as.numeric(days_in_pre))) +
geom_point() + geom_smooth() + theme_bw() +
scale_y_sqrt() +
labs(x = "Date of pre-review opening", y = "Number of days in pre-review") +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = review_opened, y = as.numeric(days_in_rev))) +
geom_point() + geom_smooth() + theme_bw() +
scale_y_sqrt() +
labs(x = "Date of review opening", y = "Number of days in review") +
theme(axis.title = element_text(size = 15))Next, we consider the languages used by the submissions. Note that a given submission can use multiple languages.
sspl <- strsplit(papers$languages, ",")
all_languages <- unique(unlist(sspl))
langs <- do.call(dplyr::bind_rows, lapply(all_languages, function(l) {
data.frame(language = l,
nbr_submissions = sum(vapply(sspl, function(v) l %in% v, 0)))
}))
langs %>% dplyr::arrange(desc(nbr_submissions))## language nbr_submissions
## 1 TeX 862
## 2 Python 576
## 3 R 261
## 4 Shell 260
## 5 Makefile 191
## 6 C++ 149
## 7 HTML 123
## 8 Jupyter Notebook 115
## 9 C 75
## 10 CSS 66
## 11 CMake 58
## 12 Batchfile 55
## 13 JavaScript 55
## 14 Matlab 48
## 15 Julia 33
## 16 Fortran 24
## 17 Ruby 19
## 18 Java 19
## 19 PowerShell 18
## 20 Perl 11
## 21 Rebol 10
## 22 M4 7
## 23 Go 7
## 24 GLSL 7
## 25 C# 6
## 26 Roff 6
## 27 M 6
## 28 Cuda 5
## 29 Rust 5
## 30 Prolog 4
## 31 PHP 4
## 32 Smarty 4
## 33 Mathematica 4
## 34 IDL 3
## 35 Scala 3
## 36 Groovy 3
## 37 QMake 3
## 38 Objective-C 3
## 39 Awk 2
## 40 NSIS 2
## 41 Smalltalk 2
## 42 Assembly 2
## 43 Stan 2
## 44 XSLT 2
## 45 IGOR Pro 2
## 46 Vue 2
## 47 OCaml 2
## 48 Tcl 2
## 49 GAP 2
## 50 Lua 2
## 51 AGS Script 1
## 52 PLSQL 1
## 53 Common Lisp 1
## 54 eC 1
## 55 Lasso 1
## 56 Gnuplot 1
## 57 Stata 1
## 58 wdl 1
## 59 Kotlin 1
## 60 Inno Setup 1
## 61 Logos 1
## 62 Yacc 1
## 63 CoffeeScript 1
## 64 F# 1
## 65 XQuery 1
## 66 D 1
## 67 Golo 1
## 68 Scheme 1
## 69 Puppet 1
## 70 HyPhy 1
## 71 FreeMarker 1
## 72 UnrealScript 1
## 73 ANTLR 1
## 74 SourcePawn 1
## 75 Max 1
## 76 QML 1
## 77 JSONiq 1
## 78 TypeScript 1
## 79 Mako 1
## 80 Emacs Lisp 1
## 81 Hack 1
ggplot(langs %>% dplyr::arrange(desc(nbr_submissions)) %>%
dplyr::mutate(language = factor(language, levels = language)),
aes(x = language, y = nbr_submissions)) +
geom_bar(stat = "identity") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = "", y = "Number of submissions") +
theme(axis.title = element_text(size = 15))ggplotly(
ggplot(papers, aes(x = citation_count, y = repo_nbr_stars,
label = title)) +
geom_point(alpha = 0.5) + scale_x_sqrt() + scale_y_sqrt() +
theme_bw() +
labs(x = "Crossref citation count", y = "Number of stars, GitHub repo") +
theme(axis.title = element_text(size = 15)),
tooltip = c("label", "x", "y")
)ggplot(papers, aes(x = as.numeric(prerev_opened - repo_created))) +
geom_histogram(bins = 50) +
theme_bw() +
labs(x = "Time (days) from repo creation to JOSS pre-review start") +
theme(axis.title = element_text(size = 15))ggplot(papers, aes(x = as.numeric(repo_pushed - review_closed))) +
geom_histogram(bins = 50) +
theme_bw() +
labs(x = "Time (days) from closure of JOSS review to most recent commit in repo") +
theme(axis.title = element_text(size = 15)) +
facet_wrap(~ year(published.date), scales = "free_y")Submissions associated with rOpenSci and pyOpenSci are not considered here, since they are not explicitly reviewed at JOSS.
ggplot(papers %>%
dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) %>%
dplyr::mutate(year = year(published.date)),
aes(x = nbr_reviewers)) + geom_bar() +
facet_wrap(~ year) + theme_bw() +
labs(x = "Number of reviewers", y = "Number of submissions")Submissions associated with rOpenSci and pyOpenSci are not considered here, since they are not explicitly reviewed at JOSS.
reviewers <- papers %>%
dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::select(reviewers, year) %>%
tidyr::separate_rows(reviewers, sep = ",")
## Most active reviewers
DT::datatable(
reviewers %>% dplyr::group_by(reviewers) %>%
dplyr::summarize(nbr_reviews = length(year),
timespan = paste(unique(c(min(year), max(year))),
collapse = " - ")) %>%
dplyr::arrange(desc(nbr_reviews)),
escape = FALSE, rownames = FALSE,
options = list(scrollX = TRUE)
)ggplot(papers %>%
dplyr::mutate(year = year(published.date),
`r/pyOpenSci` = factor(
grepl("rOpenSci|pyOpenSci", prerev_labels),
levels = c("TRUE", "FALSE"))),
aes(x = editor)) + geom_bar(aes(fill = `r/pyOpenSci`)) +
theme_bw() + facet_wrap(~ year, ncol = 1) +
scale_fill_manual(values = c(`TRUE` = "grey65", `FALSE` = "grey35")) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = "Editor", y = "Number of submissions")all_licenses <- sort(unique(papers$repo_license))
license_levels = c(grep("apache", all_licenses, value = TRUE),
grep("bsd", all_licenses, value = TRUE),
grep("mit", all_licenses, value = TRUE),
grep("gpl", all_licenses, value = TRUE),
grep("mpl", all_licenses, value = TRUE))
license_levels <- c(license_levels, setdiff(all_licenses, license_levels))
ggplot(papers %>%
dplyr::mutate(repo_license = factor(repo_license,
levels = license_levels)),
aes(x = repo_license)) +
geom_bar() +
theme_bw() +
labs(x = "Software license", y = "Number of submissions") +
theme(axis.title = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
facet_wrap(~ year(published.date), scales = "free_y")## For plots below, replace licenses present in less
## than 2.5% of the submissions by 'other'
tbl <- table(papers$repo_license)
to_replace <- names(tbl[tbl <= 0.025 * nrow(papers)])ggplot(papers %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::mutate(repo_license = replace(repo_license,
repo_license %in% to_replace,
"other")) %>%
dplyr::mutate(year = factor(year),
repo_license = factor(
repo_license,
levels = license_levels[license_levels %in% repo_license]
)) %>%
dplyr::group_by(year, repo_license, .drop = FALSE) %>%
dplyr::count() %>%
dplyr::mutate(year = as.integer(as.character(year))),
aes(x = year, y = n, fill = repo_license)) + geom_area() +
theme_minimal() +
scale_fill_brewer(palette = "Set1", name = "Software\nlicense",
na.value = "grey") +
theme(axis.title = element_text(size = 15)) +
labs(x = "Year", y = "Number of submissions")ggplot(papers %>%
dplyr::mutate(year = year(published.date)) %>%
dplyr::mutate(repo_license = replace(repo_license,
repo_license %in% to_replace,
"other")) %>%
dplyr::mutate(year = factor(year),
repo_license = factor(
repo_license,
levels = license_levels[license_levels %in% repo_license]
)) %>%
dplyr::group_by(year, repo_license, .drop = FALSE) %>%
dplyr::summarize(n = n()) %>%
dplyr::mutate(freq = n/sum(n)) %>%
dplyr::mutate(year = as.integer(as.character(year))),
aes(x = year, y = freq, fill = repo_license)) + geom_area() +
theme_minimal() +
scale_fill_brewer(palette = "Set1", name = "Software\nlicense",
na.value = "grey") +
theme(axis.title = element_text(size = 15)) +
labs(x = "Year", y = "Fraction of submissions")The tibble object with all data collected above is serialized to a file that can be downloaded and reused. To read the current version of this file directly from GitHub, use the following code:
papers <- readRDS(gzcon(url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_analytics.rds?raw=true")))## alternative.id container.title created deposited
## 1 10.21105/joss.00900 Journal of Open Source Software 2018-09-23 2018-09-23
## 2 10.21105/joss.00236 The Journal of Open Source Software 2017-04-19 2019-09-21
## 3 10.21105/joss.02581 Journal of Open Source Software 2020-08-26 2020-08-26
## 4 10.21105/joss.02520 Journal of Open Source Software 2020-08-26 2020-08-26
## 5 10.21105/joss.01423 Journal of Open Source Software 2019-05-08 2019-11-17
## 6 10.21105/joss.01614 Journal of Open Source Software 2019-08-20 2019-11-17
## published.print doi indexed issn issue issued
## 1 2018-09-23 10.21105/joss.00900 2020-03-10 2475-9066 29 2018-09-23
## 2 2017-04-19 10.21105/joss.00236 2020-08-26 2475-9066 12 2017-04-19
## 3 2020-08-26 10.21105/joss.02581 2020-08-26 2475-9066 52 2020-08-26
## 4 2020-08-26 10.21105/joss.02520 2020-08-26 2475-9066 52 2020-08-26
## 5 2019-05-08 10.21105/joss.01423 2020-04-07 2475-9066 37 2019-05-08
## 6 2019-08-20 10.21105/joss.01614 2020-02-14 2475-9066 40 2019-08-20
## member page prefix publisher reference.count score source
## 1 8722 900 10.21105 The Open Journal 9 1 Crossref
## 2 8722 236 10.21105 The Open Journal 12 1 Crossref
## 3 8722 2581 10.21105 The Open Journal 10 1 Crossref
## 4 8722 2520 10.21105 The Open Journal 11 1 Crossref
## 5 8722 1423 10.21105 The Open Journal 9 1 Crossref
## 6 8722 1614 10.21105 The Open Journal 7 1 Crossref
## title
## 1 GB code: A grain boundary generation code
## 2 Brightway: An open source framework for Life Cycle Assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## type url volume
## 1 journal-article http://dx.doi.org/10.21105/joss.00900 3
## 2 journal-article http://dx.doi.org/10.21105/joss.00236 2
## 3 journal-article http://dx.doi.org/10.21105/joss.02581 5
## 4 journal-article http://dx.doi.org/10.21105/joss.02520 5
## 5 journal-article http://dx.doi.org/10.21105/joss.01423 4
## 6 journal-article http://dx.doi.org/10.21105/joss.01614 4
## author
## 1 http://orcid.org/0000-0002-9616-4602, http://orcid.org/0000-0003-4281-5665, NA, FALSE, FALSE, NA, R., B., J., Hadian, Grabowski, Neugebauer, first, additional, additional
## 2 http://orcid.org/0000-0002-7898-9862, FALSE, Chris, Mutel, first
## 3 http://orcid.org/0000-0003-0872-7098, NA, NA, NA, NA, NA, FALSE, NA, NA, NA, NA, NA, Brendan, Devin, Brian, Jason, Molly, Nicholas, Boyd, Silvia, O’Shea, Tumlinson, Peeples, Earl, first, additional, additional, additional, additional, additional
## 4 http://orcid.org/0000-0003-2391-4086, http://orcid.org/0000-0003-3667-443X, FALSE, FALSE, Santiago, Francesc, Badia, Verdugo, first, additional
## 5 http://orcid.org/0000-0002-8086-3185, http://orcid.org/0000-0003-2358-7919, http://orcid.org/0000-0002-0916-7339, http://orcid.org/0000-0002-3992-5399, FALSE, FALSE, FALSE, FALSE, Jason, Tony, Paul, Hiroki, Lin, Kuo, Horton, Nagase, first, additional, additional, additional
## 6 http://orcid.org/0000-0002-1361-5712, http://orcid.org/0000-0002-5662-9604, http://orcid.org/0000-0001-6915-4583, http://orcid.org/0000-0002-0361-6463, http://orcid.org/0000-0003-4217-4642, FALSE, FALSE, FALSE, FALSE, FALSE, Kolja, Monica, Nitin, Arthur, Stuart, Glogowski, Bobra, Choudhary, Amezcua, Mumford, first, additional, additional, additional, additional
## citation_count
## 1 1
## 2 39
## 3 0
## 4 0
## 5 1
## 6 2
## api_title
## 1 GB code: A grain boundary generation code
## 2 Brightway: An open source framework for Life Cycle Assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## api_state editor reviewers
## 1 accepted @labarba @vyasr,@trallard
## 2 accepted @katyhuff @amoeba
## 3 accepted @danielskatz @olebole,@zpace
## 4 accepted @Kevin-Mattheus-Moerman @PetrKryslUCSD,@TeroFrondelius
## 5 accepted @lpantano @darogan
## 6 accepted @xuanxu @mgckind,@aureliocarnero
## nbr_reviewers repo_url review_issue_id
## 1 2 https://github.com/oekosheri/GB_code 900
## 2 1 https://bitbucket.org/cmutel/brightway2 236
## 3 2 https://github.com/biboyd/SALSA 2581
## 4 2 https://github.com/gridap/Gridap.jl 2520
## 5 1 https://github.com/jlincbio/cred 1423
## 6 2 https://github.com/sunpy/drms 1614
## prereview_issue_id languages
## 1 853 Python,TeX
## 2 228 Python,Shell,Jupyter Notebook,HTML,TeX
## 3 2532 Jupyter Notebook,TeX,Shell,Python
## 4 2464 Julia,Shell,TeX
## 5 1374 Makefile,Perl,C,TeX
## 6 1559 Python,TeX
## archive_doi
## 1 https://doi.org/10.5281/zenodo.1433530
## 2 http://dx.doi.org/10.5281/zenodo.556145
## 3 https://doi.org/10.5281/zenodo.4002067
## 4 https://doi.org/10.5281/zenodo.3999839
## 5 https://doi.org/10.5281/zenodo.2667613
## 6 https://doi.org/10.5281/zenodo.3369966
## review_title
## 1 GB_code: A grain boundary generation code
## 2 Brightway: An open source framework for life cycle assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## review_number review_state review_opened review_closed review_ncomments
## 1 900 closed 2018-08-17 2018-09-23 90
## 2 236 closed 2017-04-13 2017-04-19 18
## 3 2581 closed 2020-08-18 2020-08-26 38
## 4 2520 closed 2020-07-26 2020-08-26 74
## 5 1423 closed 2019-05-01 2019-05-08 93
## 6 1614 closed 2019-08-01 2019-08-20 62
## review_labels
## 1 accepted,published,recommend-accept
## 2 accepted,published,recommend-accept
## 3 Jupyter Notebook,Shell,TeX,accepted,published,recommend-accept
## 4 Julia,TeX,accepted,published,recommend-accept
## 5 accepted,published,recommend-accept
## 6 accepted,published,recommend-accept
## prerev_title
## 1 GB_code: A grain boundary generation code
## 2 Brightway: An open source framework for life cycle assessment
## 3 SALSA: A Python Package for Constructing Synthetic Quasar Absorption Line Catalogs from Astrophysical Hydrodynamic Simulations
## 4 Gridap: An extensible Finite Element toolbox in Julia
## 5 CRED: a rapid peak caller for Chem-seq data
## 6 drms: A Python package for accessing HMI and AIA data
## prerev_state prerev_opened prerev_closed prerev_ncomments
## 1 closed 2018-07-26 2018-08-17 42
## 2 closed 2017-04-06 2017-04-13 14
## 3 closed 2020-07-28 2020-08-18 44
## 4 closed 2020-07-10 2020-07-26 47
## 5 closed 2019-04-14 2019-05-01 30
## 6 closed 2019-07-11 2019-08-01 29
## prerev_labels days_in_pre days_in_rev to_review repo_created
## 1 Python,TeX 22 days 37 days TRUE 2018-07-12
## 2 7 days 6 days TRUE <NA>
## 3 Jupyter Notebook,Shell,TeX 21 days 8 days TRUE 2020-06-11
## 4 Julia,TeX 16 days 31 days TRUE 2019-03-15
## 5 C,Makefile,Perl 17 days 7 days TRUE 2019-04-10
## 6 Python 21 days 19 days TRUE 2016-05-12
## repo_updated repo_pushed repo_nbr_stars repo_language repo_license
## 1 2020-09-03 2019-08-08 13 Python mit
## 2 <NA> <NA> NA <NA> <NA>
## 3 2020-08-27 2020-08-27 2 Python bsd-3-clause
## 4 2020-09-08 2020-09-07 105 Julia mit
## 5 2020-02-26 2020-02-26 1 C gpl-3.0
## 6 2020-04-16 2020-04-16 11 Python mit
## repo_nbr_contribs repo_nbr_contribs_2ormore repo_info_obtained published.date
## 1 2 2 2020-09-08 2018-09-23
## 2 NA NA <NA> 2017-04-19
## 3 2 1 2020-09-08 2020-08-26
## 4 11 8 2020-09-08 2020-08-26
## 5 2 1 2020-09-08 2019-05-08
## 6 7 5 2020-09-08 2019-08-20
## halfyear nbr_authors
## 1 2018H2 3
## 2 2017H1 1
## 3 2020H2 6
## 4 2020H2 2
## 5 2019H1 4
## 6 2019H2 5
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.9.2.1 DT_0.15 jsonlite_1.7.1 purrr_0.3.4
## [5] gh_1.1.0 lubridate_1.7.9 ggplot2_3.3.2 tidyr_1.1.2
## [9] dplyr_1.0.2 rcrossref_1.0.0 tibble_3.0.3
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 lattice_0.20-41 assertthat_0.2.1 digest_0.6.25
## [5] mime_0.9 R6_2.4.1 plyr_1.8.6 evaluate_0.14
## [9] httr_1.4.2 pillar_1.4.6 rlang_0.4.7 lazyeval_0.2.2
## [13] curl_4.3 rstudioapi_0.11 data.table_1.13.0 miniUI_0.1.1.1
## [17] Matrix_1.2-18 rmarkdown_2.3 splines_4.0.2 urltools_1.7.3
## [21] labeling_0.3 stringr_1.4.0 htmlwidgets_1.5.1 triebeard_0.3.0
## [25] munsell_0.5.0 shiny_1.5.0 compiler_4.0.2 httpuv_1.5.4
## [29] xfun_0.16 pkgconfig_2.0.3 mgcv_1.8-31 htmltools_0.5.0
## [33] tidyselect_1.1.0 httpcode_0.3.0 fansi_0.4.1 viridisLite_0.3.0
## [37] crayon_1.3.4 withr_2.2.0 later_1.1.0.1 crul_1.0.0
## [41] grid_4.0.2 nlme_3.1-148 xtable_1.8-4 gtable_0.3.0
## [45] lifecycle_0.2.0 magrittr_1.5 scales_1.1.1 bibtex_0.4.2.2
## [49] cli_2.0.2 stringi_1.4.6 farver_2.0.3 promises_1.1.1
## [53] xml2_1.3.2 ellipsis_0.3.1 generics_0.0.2 vctrs_0.3.4
## [57] RColorBrewer_1.1-2 tools_4.0.2 glue_1.4.2 crosstalk_1.1.0.1
## [61] fastmap_1.0.1 yaml_2.2.1 colorspace_1.4-1 knitr_1.29